#' ---
#' title: "Create Codebooks for Text as Behavior Paper"
#' author: "Omar Wasow"
#' date: "`r Sys.Date()`"
#' description: |
#'   Generates codebooks for variables used in analyses.
#'   Uses dataReporter package to create PDF documentation.
#' ---

# =============================================================================
# Setup
# =============================================================================

library(here)
library(dplyr)
library(dataReporter)

# =============================================================================
# ANES Codebook (2016-2020-2024 Panel)
# =============================================================================

# Load the most comprehensive ANES file (2024 merged includes 2016 and 2020)
load(here("text_data_output", "anes2024_merged.Rdata"))

# Variables used in ANES analyses
anes_vars <- c(
    # Outcome variables - 2016
    "vote_dem16", "vote_rep16", "vote_validated16",
    # Outcome variables - 2020
    "vote_biden20_bin", "vote_trump20_bin", "vote_valid20_weighted",
    # Outcome variables - 2024
    "vote_harris24_bin", "vote_trump24_bin",
    # Party switching outcomes
    "switcher2_20_fct", "switcher24_fct",

    # Text behavior predictors - 2016
    "nchar_align_ihs", "nchar_problems_ihs",
    "nonresp_nchar_all", "nonresp_lddr16", "nonresp_lrdd16",
    "nonresp_lddr16_fct", "nonresp_lrdd16_fct",

    # Text behavior predictors - 2020
    "nchar_problems20_ihs", "nchar_problems20_fct", "nchar_problems20_bin",

    # Demographics and controls - 2016
    "pid3_16", "pid4_16", "ideo7_16",
    "ft_trump_clinton", "racial_resent16", "sexism16", "authorit16",
    "educ16", "age16", "female16", "race16", "income16",
    "pol_attn16", "mode16", "likely_vote16", "reg16_bin",

    # Demographics and controls - 2020
    "pid4_20_fct", "educ20", "age20", "female20",
    "race20_fct", "income20", "pol_attn20", "likely_vote20"
)

# Subset to variables that exist in the data
anes_vars_exist <- anes_vars[anes_vars %in% names(a24)]
anes_subset <- a24 %>% select(all_of(anes_vars_exist))

# Create ANES codebook
makeCodebook(
    data = anes_subset,
    reportTitle = "ANES 2016-2020-2024 Panel Variables",
    file = here("text_data_output", "codebook_anes.Rmd"),
    output = "pdf",
    replace = TRUE,
    openResult = FALSE
)

message("ANES codebook created.")

# =============================================================================
# Afrobarometer Codebook
# =============================================================================

load(here("text_data_output", "afrobarometer_processed.Rdata"))

# Variables used in Afrobarometer analyses
afro_vars <- c(
    # Outcome
    "nchar_dem_total",
    # Key predictors
    "dem_importance", "understood_dem", "lang",
    # Controls
    "gender", "educ", "age", "income_proxy", "race5"
)

# Subset to variables that exist
afro_vars_exist <- afro_vars[afro_vars %in% names(afro)]
afro_subset <- afro %>% select(all_of(afro_vars_exist))

# Create Afrobarometer codebook
makeCodebook(
    data = afro_subset,
    reportTitle = "Afrobarometer Variables",
    file = here("text_data_output", "codebook_afrobarometer.Rmd"),
    output = "pdf",
    replace = TRUE,
    openResult = FALSE
)

message("Afrobarometer codebook created.")

# =============================================================================
# KMM/AAP Experiment Codebook
# =============================================================================

load(here("text_data_output", "aap_processed.Rdata"))

# Variables used in KMM analyses
kmm_vars <- c(
    # Outcomes
    "time_min", "nchar_sum",
    # Treatment and race
    "treatment_cit", "treatment_fct",
    "asian", "white", "asian_white_cond", "asian_white_fct",
    # Demographics
    "female", "age", "ed"
)

# Subset to variables that exist
kmm_vars_exist <- kmm_vars[kmm_vars %in% names(aap)]
kmm_subset <- aap %>% select(all_of(kmm_vars_exist))

# Create KMM codebook
makeCodebook(
    data = kmm_subset,
    reportTitle = "KMM Social Exclusion Experiment Variables",
    file = here("text_data_output", "codebook_kmm.Rmd"),
    output = "pdf",
    replace = TRUE,
    openResult = FALSE
)

message("KMM codebook created.")

# =============================================================================
# Summary
# =============================================================================

message("\n=== Codebook Generation Complete ===")
message("Created:")
message("  - text_data_output/codebook_anes.pdf")
message("  - text_data_output/codebook_afrobarometer.pdf")
message("  - text_data_output/codebook_kmm.pdf")
